d <-read_csv(paste0(home, "/output/temp-data-for-fitting.csv"))#> Rows: 98616 Columns: 13#> ── Column specification ────────────────────────────────────────────────────────#> Delimiter: ","#> chr (4): area, source, db, id#> dbl (8): year, temp, yday, month, day, VkDag, stn_nr, section_nr#> date (1): date#> #> ℹ Use `spec()` to retrieve the full column specification for this data.#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Fit model
Code
d <- d |>mutate(area =as.factor(area),source_f =as.factor(source),year_f =as.factor(year))#> mutate: converted 'area' from character to factor (0 new NA)#> new variable 'source_f' (factor) with 3 unique values and 0% NA#> new variable 'year_f' (factor) with 83 unique values and 0% NAm <-sdmTMB(temp ~ area*year_f + source_f +s(yday, by = area, bs ="cc"), data = d,family =student(df =6),spatial ="off",spatiotemporal ="off",knots =list(yday =c(0.5, 364.5)),control =sdmTMBcontrol(newton_loops =1))
Check fit
Code
sanity(m)#> ✔ Non-linear minimizer suggests successful convergence#> ✔ Hessian matrix is positive definite#> ✔ No extreme or very small eigenvalues detected#> ✔ No gradients with respect to fixed effects are >= 0.001#> ✔ No fixed-effect standard errors are NA#> ✔ No standard errors look unreasonably large#> ✔ No sigma parameters are < 0.01#> ✔ No sigma parameters are > 100#> Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning#> Inf#> Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning#> -Inf#> Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning#> Inf#> Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning#> -Infsamps <- sdmTMBextra::predict_mle_mcmc(m, mcmc_iter =201, mcmc_warmup =200)#> #> SAMPLING FOR MODEL 'tmb_generic' NOW (CHAIN 1).#> Chain 1: #> Chain 1: Gradient evaluation took 0.013841 seconds#> Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 138.41 seconds.#> Chain 1: Adjust your expectations accordingly!#> Chain 1: #> Chain 1: #> Chain 1: Iteration: 1 / 201 [ 0%] (Warmup)#> Chain 1: Iteration: 20 / 201 [ 9%] (Warmup)#> Chain 1: Iteration: 40 / 201 [ 19%] (Warmup)#> Chain 1: Iteration: 60 / 201 [ 29%] (Warmup)#> Chain 1: Iteration: 80 / 201 [ 39%] (Warmup)#> Chain 1: Iteration: 100 / 201 [ 49%] (Warmup)#> Chain 1: Iteration: 120 / 201 [ 59%] (Warmup)#> Chain 1: Iteration: 140 / 201 [ 69%] (Warmup)#> Chain 1: Iteration: 160 / 201 [ 79%] (Warmup)#> Chain 1: Iteration: 180 / 201 [ 89%] (Warmup)#> Chain 1: Iteration: 200 / 201 [ 99%] (Warmup)#> Chain 1: Iteration: 201 / 201 [100%] (Sampling)#> Chain 1: #> Chain 1: Elapsed Time: 148.64 seconds (Warm-up)#> Chain 1: 0.167283 seconds (Sampling)#> Chain 1: 148.808 seconds (Total)#> Chain 1:mcmc_res <-residuals(m, type ="mle-mcmc", mcmc_samples = samps)ggplot(d, aes(sample = mcmc_res)) +stat_qq() +stat_qq_line() +labs(y ="Sample Quantiles", x ="Theoretical Quantiles") +theme(aspect.ratio =1)
Code
ggsave(paste0(home, "/figures/supp/qq_temp.pdf"), width =11, height =11, units ="cm")
Predict
Code
# Make a new data frame and predict!nd <-data.frame(expand.grid(yday =seq(min(d$yday), max(d$yday), by =1),area =unique(d$area),year =unique(d$year))) |>mutate(source ="logger") |>mutate(id =paste(year, area, sep ="_"),source_f =as.factor(source),year_f =as.factor(year)) #> mutate: new variable 'source' (character) with one unique value and 0% NA#> mutate: new variable 'id' (character) with 996 unique values and 0% NA#> new variable 'source_f' (factor) with one unique value and 0% NA#> new variable 'year_f' (factor) with 83 unique values and 0% NA# Predictnd$pred <-predict(m, newdata = nd)$est
In order to have a lower temperature in before-nuclear times (without any data to inform that), we can use the nearby areas.. so FM informs BT prior to nuclear
Code
nd <- nd |>mutate(keep ="Y",keep =ifelse(area =="BT"& year <1980, "N", keep),keep =ifelse(area =="SI_HA"& year <1972, "N", keep)) |>filter(keep =="Y")#> mutate: new variable 'keep' (character) with 2 unique values and 0% NA#> filter: removed 26,352 rows (7%), 338,184 rows remainingnd_sub <- nd |>mutate(keep ="N",keep =ifelse(area =="FM"& year <1980, "Y", keep), # use FM instead of BTkeep =ifelse(area =="SI_EK"& year <1972, "Y", keep)) |># use SI_EK instead of SI_HAfilter(keep =="Y")#> mutate: changed 311,832 values (92%) of 'keep' (0 new NA)#> filter: removed 311,832 rows (92%), 26,352 rows remaining# Now change the labels to BT and SI_EK...nd_sub <- nd_sub |>mutate(area =ifelse(area =="FM", "BT", "SI_HA"))#> mutate: converted 'area' from factor to character (0 new NA)# Bind rows and plot only the temperature series we will use for growth modellingnd <-bind_rows(nd, nd_sub) |>select(-keep)#> select: dropped one variable (keep)
Keep track of the years for which we have cohorts for matching with cohort data
Code
gdat <- readr::read_csv(paste0(home, "/data/for_analysis/dat.csv"))#> Rows: 364546 Columns: 11#> ── Column specification ────────────────────────────────────────────────────────#> Delimiter: ","#> chr (5): age_ring, area, gear, ID, sex#> dbl (6): length_mm, age_bc, age_catch, catch_year, cohort, final_length#> #> ℹ Use `spec()` to retrieve the full column specification for this data.#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.gdat$area_cohort_age <-as.factor(paste(gdat$area, gdat$cohort, gdat$age_bc))gdat <- gdat |>group_by(area_cohort_age) |>filter(n() >10)#> group_by: one grouping variable (area_cohort_age)#> filter (grouped): removed 5,488 rows (2%), 359,058 rows remaininggdat <- gdat |>filter(age_catch >3) |>group_by(area) |>summarise(min =min(cohort)) |>arrange(min)#> filter (grouped): removed 121,647 rows (34%), 237,411 rows remaining#> group_by: one grouping variable (area)#> summarise: now 12 rows and 2 columns, ungroupednd <-left_join(nd, gdat, by ="area") |>mutate(growth_dat =ifelse(year >= min, "Y", "N"))#> left_join: added one column (min)#> > rows only in x 0#> > rows only in y ( 0)#> > matched rows 364,536#> > =========#> > rows total 364,536#> mutate: new variable 'growth_dat' (character) with 2 unique values and 0% NA
Plot predictions
Code
nd |>filter(growth_dat =="Y") |>ggplot(aes(yday, y = year, fill = pred, color = pred)) +geom_raster() +facet_wrap(~area, ncol =3) +scale_fill_viridis(option ="magma") +scale_color_viridis(option ="magma") +labs(x ="Yearday", y ="Year", color ="Predicted SST (°C)", fill ="Predicted SST (°C)")#> filter: removed 157,380 rows (43%), 207,156 rows remaining
Detailed exploration of predictions
Code
# Loop trough all areas, plot temperature as a function of yday, color by data source, facet by yearfor(i inunique(nd$area)) { plotdat <- nd |>filter(area == i)print(ggplot(plotdat, aes(yday, pred, linetype ="Model prediction (logger)")) +scale_color_brewer(palette ="Accent") +facet_wrap(~year) +geom_point(data =filter(d, area == i & year >min(plotdat$year)), aes(yday, temp, color = source),size =0.75) +geom_line() +labs(title =paste("Area = ", i), color ="", linetype ="") +guides(color =guide_legend(title.position="top", title.hjust =0.5)) +theme_sleek(base_size =8) +theme(legend.position =c(0.7, 0.03), legend.direction ="horizontal",legend.spacing.y =unit(-0.3, "cm")) +labs(x ="Day of the year", y ="Predicted SST (°C)") )ggsave(paste0(home, "/figures/supp/temp_pred_yday_area_", i, ".pdf" ), width =17, height =17, units ="cm")}#> filter: removed 334,158 rows (92%), 30,378 rows remaining#> filter: removed 95,167 rows (97%), 3,449 rows remaining#> filter: removed 334,158 rows (92%), 30,378 rows remaining#> filter: removed 89,164 rows (90%), 9,452 rows remaining
dsum <- d |>filter(yday %in%c(yday("2000-03-01"):yday("2000-10-01"))) |>group_by(year, area, source) |>summarise(temp =mean(temp)) |>mutate(type ="data")#> filter: removed 29,555 rows (30%), 69,061 rows remaining#> group_by: 3 grouping variables (year, area, source)#> summarise: now 1,635 rows and 4 columns, 2 group variables remaining (year, area)#> mutate (grouped): new variable 'type' (character) with one unique value and 0% NApreds <- nd |>filter(growth_dat =="Y"& yday %in%c(yday("2000-03-01"):yday("2000-10-01")) & source =="logger") |>group_by(area, year) |>summarise(temp =mean(pred)) |>mutate(type ="model")#> filter: removed 242,846 rows (67%), 121,690 rows remaining#> group_by: 2 grouping variables (area, year)#> summarise: now 566 rows and 3 columns, one group variable remaining (area)#> mutate (grouped): new variable 'type' (character) with one unique value and 0% NAggplot(preds, aes(year, temp)) +geom_point(data = dsum, aes(year, temp, color = source), size =0.75, alpha =0.75) +scale_color_brewer(palette ="Accent") +geom_line(linewidth =0.5, color ="grey20") +facet_wrap(~area)
Make final plot
Code
order <- preds |>group_by(area) |>summarise(mean_temp =mean(temp)) |>arrange(desc(mean_temp))#> group_by: one grouping variable (area)#> summarise: now 12 rows and 2 columns, ungroupedorder#> # A tibble: 12 × 2#> area mean_temp#> <chr> <dbl>#> 1 SI_HA 17.1 #> 2 BT 15.4 #> 3 TH 13.1 #> 4 SI_EK 12.3 #> 5 FM 12.1 #> 6 VN 11.9 #> 7 JM 11.7 #> 8 MU 11.4 #> 9 FB 11.0 #> 10 BS 10.5 #> 11 HO 10.3 #> 12 RA 9.67# Save plot order..topt <-19.6# Overall t_opt from 02-fit-vbge.qmd! Update if neededggplot(preds, aes(year, temp, color = temp)) +facet_wrap(~factor(area, levels = order$area)) +geom_hline(yintercept = topt, linewidth =0.3, linetype =2, color ="grey") +geom_line() +labs(x ="Year", y ="Model predicted average growing season temperature") +scale_color_viridis(option ="magma", name ="Area") +guides(color ="none")
Code
preds |>group_by(area) |>summarise(min =min(year),max =max(year)) |>arrange(min)#> group_by: one grouping variable (area)#> summarise: now 12 rows and 3 columns, ungrouped#> # A tibble: 12 × 3#> area min max#> <chr> <dbl> <dbl>#> 1 JM 1953 2022#> 2 FM 1963 2022#> 3 SI_HA 1966 2022#> 4 SI_EK 1968 2022#> 5 FB 1969 2022#> 6 BT 1971 2022#> 7 MU 1981 2022#> 8 HO 1982 2022#> 9 BS 1985 2022#> 10 RA 1985 2022#> 11 VN 1987 2022#> 12 TH 2000 2022ggsave(paste0(home, "/figures/growing_season_temp.pdf"), width =17, height =17, units ="cm")
Code
# Save prediction dfwrite_csv(preds, paste0(home, "/output/gam_predicted_temps.csv"))
If we want to get uncertainty, we can use nsim instead; this simulates from the linear predictor using the inverse precision matrix, which is a fast way to get a distribution of samples from which we can take e.g. quantiles and means. However, it’s still slow, so the code below isn’t executed yet.
Code
nd_sim <-data.frame(expand.grid(yday =seq(min(d$yday), max(d$yday), by =1),area =unique(d$area),year =unique(d$year))) |>mutate(source ="logger") |>mutate(id =paste(year, area, sep ="_"),source_f =as.factor(source),year_f =as.factor(year))# Trim!nd_sim <-left_join(nd_sim, gdat, by ="area")nd_sim <- nd_sim |>mutate(growth_dat =ifelse(year > min, "Y", "N")) |>filter(growth_dat =="Y") |>filter(yday %in%c(yday("2000-05-01"):yday("2000-9-01"))) |>mutate(area =as.factor(area))# Predict!nsim <-500m_pred_sims <-predict(m, newdata = nd_sim, nsim = nsim)# Join sims with prediction datand_sim_long <-cbind(nd_sim, as.data.frame(m_pred_sims)) %>%pivot_longer(c((ncol(nd_sim) +1):(nsim +ncol(nd_sim))))# Summarize sims over growing seasonsum_pred_gs <- nd_sim_long |>ungroup() |>group_by(year, area) |>summarise(lwr =quantile(value, prob =0.1),est =quantile(value, prob =0.5),upr =quantile(value, prob =0.9)) |>ungroup()# In order to have a lower temperature in before-nuclear times (without any data to inform that), we can use the nearby areas..sum_pred_gs <- preds |>mutate(keep ="Y",keep =ifelse(area =="BT"& year <1980, "N", keep),keep =ifelse(area =="SI_HA"& year <1972, "N", keep)) |>filter(keep =="Y")sum_pred_gs_sub <- preds |>mutate(keep ="N",keep =ifelse(area =="FM"& year <1980, "Y", keep), # use FM instead of BTkeep =ifelse(area =="SI_EK"& year <1972, "Y", keep)) |># use SI_EK instead of SI_HAfilter(keep =="Y")# Now change the labels to BT and SI_EK...sum_pred_gs_sub <- sum_pred_gs_sub |>mutate(area =ifelse(area =="FM", "BT", "SI_HA"))# Bind rows and plot only the temperature series we will use for growth modellingsum_pred_gs <-bind_rows(sum_pred_gs, sum_pred_gs_sub) |>select(-keep, -type)order <- sum_pred_gs |>group_by(area) |>summarise(mean_temp =mean(temp)) |>arrange(desc(mean_temp))